/*
Produces the quarterly drug panel used in the core anti-cholesterol analysis
*/





/*
Create a balanced panel (user list)
*/

forvalues year = 1996/2013 {

	clear
	use ccaea`year' 

	keep enrolid
	egen t = tag(enrolid)
	keep if t == 1
	drop t

	save enrol_list`year', replace
}



clear
use enrol_list2003

forvalues year = 2004/2012 {

	merge 1:1 enrolid using enrol_list`year'
	keep if _merge == 3
	drop _merge 

}

display _N
save panel_enrolid_2003_2012, replace // 2.45 million






/*
Create a list of all cholesterol drug users (including those diagnosed but not taking)
*/


clear
gen enrolid = 0
gen times = 0
save cholesterol_first_diagnosis, replace


forvalues j=1996/2009 {
cd "/disk/aging/mktscan/nongeo/data/100pct"
clear
display `j'

use enrolid svcdate dx1 dx2 using ccaeo`j'
//capture use enrolid svcdate DX1 DX2 using ccaeo`j'
//capture rename DX1 dx1
//capture rename DX2 dx2

keep if dx1 == "2720" | dx2 == "2720"
gen times = 1
keep enrolid svcdate times

append using cholesterol_first_diagnosis

collapse (min) svcdate (sum) times, by(enrolid)
save cholesterol_first_diagnosis, replace

}

// different variable names
forvalues j=2010/2013 {
clear
display `j'

//use enrolid svcdate dx1 dx2 using ccaeo`j' // for sample runs
use enrolid svcdate DX1 DX2 using ccaeo`j'
rename DX1 dx1
rename DX2 dx2

keep if dx1 == "2720" | dx2 == "2720" // do we want to include other 272x?
gen times = 1
keep enrolid svcdate times

append using cholesterol_first_diagnosis

collapse (min) svcdate (sum) times, by(enrolid)
save cholesterol_first_diagnosis, replace

}


clear
use cholesterol_first_diagnosis

rename svcdate first_diagnosis_date

save cholesterol_first_diagnosis, replace







// combined claims 
clear
use user_quarter_choice_cholesterol

keep enrolid
duplicates drop

append using cholesterol_first_diagnosis
keep enrolid
duplicates drop

save cholesterol_all_relevant_users, replace


// find last quarter in which these users are in the data
clear
use all_enrolid_info

merge 1:1 enrolid using cholesterol_all_relevant_users
keep if _merge == 3
drop _merge

gen end_date = mdy(1,1,end_year+1) - 1

keep enrolid end_date

save enrolid_end_quarter_cholesterol_relevant, replace






/*
Extract additional health information related to each cholesterol drug user
- 1) inpatient 2) outpatient 3) number of drugs
*/

set more off


clear
gen totpay = 0
save cholesterol_user_inpatient_events_quarter_aggregate, replace


forvalues j=1996/2013 {
	clear
	display `j'

	// ccaei is inpatient admissions
	// ccaes is inpatient services (4 diagnosis codes)
	use enrolid admdate days dx* totpay using ccaei`j' // could add more variables

	merge m:1 enrolid using cholesterol_all_relevant_users, keep(3)
	drop _merge

	// from earlier exploration in 2-health under balanced
	gen heart_related = ///
	(dx1 == "41400") | ///
	(dx1 == "41401") | ///
	(dx1 == "41071") | ///
	(dx1 == "78659") | ///
	(dx1 == "27801") | ///
	(dx1 == "42731") | ///
	(dx1 == "42789") | ///
	(dx1 == "43491") | ///
	(dx1 == "78650")

	gen quarter = qofd(admdate)

	gen inpatient_events = 1
	collapse (count) inpatient_events (sum) heart_related days totpay, by(enrolid quarter)

	rename totpay inpatient_spending

	drop if enrolid == .

	append using cholesterol_user_inpatient_events_quarter_aggregate
	save cholesterol_user_inpatient_events_quarter_aggregate, replace
}




//////////////////////////
// Outpatient aggregate //
//////////////////////////

clear
gen quarter = 0
save cholesterol_user_outpatient_user_aggregate, replace



forvalues j=1996/2013 {
	clear
	use enrolid svcdate pay using ccaeo`j'

	merge m:1 enrolid using cholesterol_all_relevant_users, keep(3)
	drop _merge

	gen quarter = qofd(svcdate)

	drop if enrolid == .
	collapse (sum) pay, by(enrolid quarter)
	
	append using cholesterol_user_outpatient_user_aggregate
	save cholesterol_user_outpatient_user_aggregate, replace
}

rename pay outpatient_spending

save cholesterol_user_outpatient_user_aggregate, replace



//////////////////////////////////
// also get the number of drugs //
//////////////////////////////////

clear
gen quarter = 0
save cholesterol_user_drugs_user_aggregate, replace



forvalues j=1996/2013 {

	clear
	use enrolid genind ndcnum svcdate pay using ccaed`j'
	
	merge m:1 enrolid using "cholesterol_all_relevant_users", keep(3)
	drop _merge
	
	merge m:1 ndcnum using "compressed_redbook_v1", keepus(generic_id) keep(mat)
	drop _merge
	
	merge m:1 ndcnum using "compressed_redbook_w_indications", keepus(generic_id indic_id xr) keep(1 3)
	drop _merge
	
	gen quarter = qofd(svcdate)

	egen distinct_drugs = tag(enrolid quarter generic_id)
	gen non_cholesterol = distinct_drugs
	replace non_cholesterol = 0 if indic_id == 5
	
	gen non_cholesterol_pay = pay * non_cholesterol

	drop if enrolid == .

	collapse (sum) distinct_drugs non_cholesterol pay non_cholesterol_pay, by(enrolid quarter)
	
	append using cholesterol_user_drugs_user_aggregate
	save cholesterol_user_drugs_user_aggregate, replace
}

rename pay prescription_spending

save cholesterol_user_drugs_user_aggregate, replace





/*
Create a list of cholesterol users that start during a quarter in which they are hospitalized
*/


clear
use user_quarter_choice_cholesterol


// focus on cholesterol
//tab indic_id
keep if indic_id == 5


// BASIC: LOOK AT IMPACT OF FIRST CHOICE ON LATER CHOICE 
collapse (min) quarter, by(enrolid)

display _N  // 10 mill

merge 1:1 enrolid quarter using cholesterol_user_inpatient_events_quarter_aggregate
keep if _merge == 3
drop _merge

keep enrolid 
duplicates drop

display _N // 536k

save user_starting_hospital, replace // still 536k people




/*
Create plan characteristics associated with each user in a given year
*/


///////////
// SETUP //
///////////
clear
set more off
use enrolid plantyp using ccaed1996 // enrolid genind ndcnum svcdate using 

gen claims = 1
collapse (sum) claims, by(enrolid plantyp)
gsort enrolid - claims
egen t = tag(enrolid)
keep if t == 1
drop t

gen year = 1996

save plantype_info, replace


forvalues j=1997/2013 {
	clear
	use enrolid plantyp using ccaed`j' // enrolid genind ndcnum svcdate using 

	gen claims = 1
	collapse (sum) claims, by(enrolid plantyp)
	gsort enrolid - claims
	egen t = tag(enrolid)
	keep if t == 1
	drop t
	
	gen year = `j'
	
	append using plantype_info
	sleep 3000
	save plantype_info, replace
}


